// Building dataset of county level demographic and socioeconomic
// data for robustness checks.
//
// Jeff Shrader & Matthew Gibson
// Creation date: 2014-02-05
// Time-stamp: "2014-09-22 16:34:11 jgs"


// Preliminaries
clear

local work "/DIRECTORY"

capture log close
log using "`work'/Logs/demographic_proc.log", replace
set more off
set matsize 800
timer clear 1
timer on 1


// Getting ready for merges
// Baseline fips and tz
insheet using "`work'/data/county_tz/county_time_zone.csv", clear names
tostring fips, replace
gen fix_fips = (length(fips)<5)
replace fips = "0" + fips if length(fips) < 5
destring pop_2010 area_km area_mi, replace ignore(,)

// We are currently using non DST offsets. I think this is wrong.
// Note, I am currently doing this separately for state and county, so if we
// move to a DST offset, we will need to correct it in both places. The
// other place is in atus_proc.do
gen tz_offset = -5 if time_zone == "E"
replace tz_offset = -6 if time_zone == "C"
replace tz_offset = -7 if time_zone == "M" | time_zone == "m"
replace tz_offset = -8 if time_zone == "P"
replace tz_offset = -9 if time_zone == "A"
replace tz_offset = -10 if time_zone == "H" | time_zone == "h"

sort fips
save "`work'/data/county_tz/county_time_zone.dta", replace

// Lat lon
use "`work'/data/qcew/County_latlong.dta", clear
sort fips
save "`work'/data/county_tz/county_latlong.dta", replace

// Demo
insheet using "`work'/data/demographics/education.csv", clear names
destring *, replace ignore(,)
tostring fips, replace
replace fips = "0" + fips if length(fips) < 5
sort fips
save "`work'/Data/demographics/education.dta", replace

insheet using "`work'/data/demographics/population_estimates.csv", clear names
destring *, replace ignore(,)
tostring fips, replace
replace fips = "0" + fips if length(fips) < 5
sort fips
save "`work'/data/demographics/population.dta", replace

insheet using "`work'/data/demographics/poverty_estimates.csv", clear names
destring *, replace ignore(,)
tostring fips, replace
replace fips = "0" + fips if length(fips) < 5
sort fips
save "`work'/data/demographics/poverty.dta", replace

insheet using "`work'/data/demographics/unemployment.csv", clear names
destring *, replace ignore(,)
tostring fips, replace
replace fips = "0" + fips if length(fips) < 5
sort fips
save "`work'/data/demographics/unemployment.dta", replace


// Merging
use "`work'/data/county_tz/county_time_zone.dta", clear
merge 1:1 fips using "`work'/data/county_tz/county_latlong.dta"
rename _merge lat_merge
save "`work'/data/county_tz/county_time_zone_latlong.dta", replace
merge 1:1 fips using "`work'/data/demographics/population.dta"
rename _merge pop_merge
merge 1:1 fips using "`work'/data/demographics/education.dta"
rename _merge educ_merge
merge 1:1 fips using "`work'/data/demographics/poverty.dta"
rename _merge pov_merge
merge 1:1 fips using "`work'/data/demographics/unemployment.dta"
rename _merge unemp_merge



// Time zone distance
bysort time_zone: egen east_long = max(longitude)
gen tzdistance = -longitude + east_long + 1

// equinox sunset time
gen date = td(23mar2010)
solar_calculator date latitude longitude tz_offset

drop if state == "PR"

sort fips
saveold "`work'/data/demographics/demographics_comb.dta", replace


insheet using "`work'/data/census/census_pop_density.csv", comma clear
keep if id == target_geo1
gen fips = substr(id,10,5)
gen state_fips = substr(fips,1,2)
split population, p("(") gen(pop)
drop pop2 population
rename pop1 population
destring population, replace
split housing_units, p("(") gen(pop)
drop pop2 housing_units
rename pop1 housing_units
destring housing_units, replace
destring pop_density_land, ignore("(X)") replace
destring housing_density_land, ignore("(X)") replace
keep fips state_fips pop_den* population* housing_* area_*
sort fips
saveold "`work'/data/census/census_pop_density.dta", replace

collapse (mean) population housing_units pop_density_land housing_density_land, by(state_fips)
foreach i in population housing_units pop_density_land housing_density_land {
   rename `i' `i'_state
}
sort state_fips
saveold "`work'/data/census/census_pop_density_state.dta", replace


// Labeling
/*


Education
Less than high school:
For 1970 and 1980, the share of adults with less than high school includes those who had not completed the 12th grade.  In 1990, 2000, 2006-2010 the share includes those who did not receive a high school diploma or its equivalent (such as a GED), but did not report college experience. 

High school only:
For 1970 and 1980, the share of adults with high school only includes those who completed 12th grade only.  In 1990,  2000, and 2006-2010 the share includes those who completed 12th grade and received a high school diploma or its equivalent (such as a GED), but did not report college experience.

Some college:
For 1970 and 1980, the share of adults with some college includes those who completed from one to three years of college.  In 1990, 2000, and 2006-2010 the share includes those who reported completing at least one year of college but did not receive a bachelor's degree.

College graduate:
For 1970 and 1980, the share of adults who are college graduates includes those who completed at four or more years of college regardless of degree earned.  In 1990, 2000, and 2006-2010 the share includes those who received a bachelor's or higher degree



Population
Column variable	Description	Notes
FIPStxt	State-County FIPS Code	
State	State name	
Area_Name	Area name	
Rural-urban_Continuum Code_2003	Rural-urban Continnuum Code, 2003	http://www.ers.usda.gov/data-products/rural-urban-continuum-codes.aspx#.UVRRXjcTSHs
Urban_Influence_Code_2003	Urban Influence Code, 2003	http://www.ers.usda.gov/data-products/urban-influence-codes.aspx#.UVRRmzcTSHs
Rural-urban_Continuum Code_2013	Rural-urban Continnuum Code, 2013	http://www.ers.usda.gov/data-products/rural-urban-continuum-codes.aspx#.UjyXuH9tdWo
Urban_Influence_Code_2013	Urban Influence Code, 2013	http://www.ers.usda.gov/data-products/urban-influence-codes.aspx#.UVRRmzcTSHs
CENSUS_2010_POP	4/1/2010 resident Census 2010 population	
ESTIMATES_BASE_2010	4/1/2010 resident total population estimates base	
POP_ESTIMATE_2010	7/1/2010 resident total population estimate	
POP_ESTIMATE_2011	7/1/2011 resident total population estimate	
POP_ESTIMATE_2012	7/1/2012 resident total population estimate	
N_POP_CHG_2010	Numeric Change in resident total population 4/1/2010 to 7/1/2010	
N_POP_CHG_2011	Numeric Change in resident total population 7/1/2010 to 7/1/2011	
N_POP_CHG_2012	Numeric Change in resident total population 7/1/2011 to 7/1/2012	
Births_2010	Births in period 4/1/2010 to 6/30/2010	
Births_2011	Births in period 7/1/2010 to 6/30/2011	
Births_2012	Births in period 7/1/2011 to 6/30/2012	
Deaths_2010	Deaths in period 4/1/2010 to 6/30/2010	
Deaths_2011	Deaths in period 7/1/2010 to 6/30/2011	
Deaths_2012	Deaths in period 7/1/2012 to 6/30/2012	
NATURAL_INC_2010	Natural increase in period 4/1/2010 to 6/30/2010	
NATURAL_INC_2011	Natural increase in period 7/1/2010 to 6/30/2011	
NATURAL_INC_2012	Natural increase in period 7/1/2011 to 6/30/2012	
INTERNATIONAL_MIG_2010	Net international migration in period 4/1/2010 to 6/30/2010	
INTERNATIONAL_MIG_2011	Net international migration in period 7/1/2010 to 6/30/2011	
INTERNATIONAL_MIG_2012	Net international migration in period 7/1/2011 to 6/30/2012	
DOMESTIC_MIG_2010	Net domestic migration in period 4/1/2010 to 6/30/2010	
DOMESTIC_MIG_2011	Net domestic migration in period 7/1/2010 to 6/30/2011	
DOMESTIC_MIG_2012	Net domestic migration in period 7/1/2011 to 6/30/2012	
NET_MIG_2010	Net migration in period 4/1/2010 to 6/30/2010	
NET_MIG_2011	Net migration in period 7/1/2010 to 6/30/2011	
NET_MIG_2012	Net migration in period 7/1/2011 to 6/30/2012	
RESIDUAL_2010	Residual for period 4/1/2010 to 6/30/2010	
RESIDUAL_2011	Residual for period 7/1/2010 to 6/30/2011	
RESIDUAL_2012	Residual for period 7/1/2011 to 6/30/2012	
GQ_ESTIMATES_BASE_2010	4/1/2010 Group Quarters total population estimates base	
GQ_ESTIMATES_2010	7/1/2010 Group Quarters total population estimate	
GQ_ESTIMATES_2011	7/1/2011 Group Quarters total population estimate	
GQ_ESTIMATES_2012	7/1/2012 Group Quarters total population estimate	
R_birth_2011	Birth rate in period 7/1/2010 to 6/30/2011	
R_birth_2012	Birth rate in period 7/1/2011 to 6/30/2012	
R_death_2011	Death rate in period 7/1/2010 to 6/30/2011	
R_death_2012	Death rate in period 7/1/2011 to 6/30/2012	
R_NATURAL_INC_2011	Natural increase rate in period 7/1/2010 to 6/30/2011	
R_NATURAL_INC_2012	Natural increase rate in period 7/1/2011 to 6/30/2012	
R_INTERNATIONAL_MIG_2011	Net international migration rate in period 7/1/2010 to 6/30/2011	
R_INTERNATIONAL_MIG_2012	Net international migration rate in period 7/1/2011 to 6/30/2012	
R_DOMESTIC_MIG_2011	Net domestic migration rate in period 7/1/2010 to 6/30/2011	
R_DOMESTIC_MIG_2012	Net domestic migration rate in period 7/1/2011 to 6/30/2012	
R_NET_MIG_2011	Net migration rate in period 7/1/2010 to 6/30/2011	
R_NET_MIG_2012	Net migration rate in period 7/1/2011 to 6/30/2012	
		
		
Sources: Census Bureau Population Estimates: http://www.census.gov/popest/data/index.html		
USDA, Economic Research Service, Rural Classifications: http://www.ers.usda.gov/topics/rural-economy-population/rural-classifications.aspx#.UVRRKjcTSHs		




Poverty
Column variable name	Description	Notes
FIPStxt	State-County FIPS Code	
State	State Abbreviation	
Area_name	Area name	
Rural-urban_Continuum Code_2003	Rural-urban Continnuum Code, 2003	http://www.ers.usda.gov/data-products/rural-urban-continuum-codes.aspx#.UjyXuH9tdWo
Urban_Influence_Code_2003	Urban Influence Code, 2003	http://www.ers.usda.gov/data-products/urban-influence-codes.aspx#.UVRRmzcTSHs
Rural-urban_Continuum Code_2013	Rural-urban Continnuum Code, 2013	http://www.ers.usda.gov/data-products/rural-urban-continuum-codes.aspx#.UjyXuH9tdWo
Urban_Influence_Code_2013	Urban Influence Code, 2013	http://www.ers.usda.gov/data-products/urban-influence-codes.aspx#.UVRRmzcTSHs
POVALL_2011	Estimate of people of all ages in poverty 2011	
CI90LBAll_2011	90% confidence interval lower bound of estimate of people of all ages in poverty 2011	
CI90UBALL_2011	90% confidence interval upper bound of estimate of people of all ages in poverty 2011	
PCTPOVALL_2011	Estimated percent of people of all ages in poverty 2011	
CI90LBALLP_2011	90% confidence interval lower bound of estimate of percent of people of all ages in poverty 2011	
CI90UBALLP_2011	90% confidence interval upper bound of estimate of percent of people of all ages in poverty 2011	
POV017_2011	Estimate of people age 0-17 in poverty 2011	
CI90LB017_2011	90% confidence interval lower bound of estimate of people age 0-17 in poverty 2011	
CI90UB017_2011	90% confidence interval upper bound of estimate of people age 0-17 in poverty 2011	
PCTPOV017_2011	Estimated percent of people age 0-17 in poverty 2011	
CI90LB017P_2011	90% confidence interval lower bound of estimate of percent of people age 0-17 in poverty 2011	
CI90UB017P_2011	90% confidence interval upper bound of estimate of percent of people age 0-17 in poverty 2011	
POV517_2011	Estimate of related children age 5-17 in families in poverty 2011	
CI90LB517_2011	90% confidence interval lower bound of estimate of related children age 5-17 in families in poverty 2011	
CI90UB517_2011	90% confidence interval upper bound of estimate of related children age 5-17 in families in poverty 2011	
PCTPOV517_2011	Estimated percent of related children age 5-17 in families in poverty 2011	
CI90LB517P_2011	90% confidence interval lower bound of estimate of percent of related children age 5-17 in families in poverty 2011	
CI90UB517P_2011	90% confidence interval upper bound of estimate of percent of related children age 5-17 in families in poverty 2011	
MEDHHINC_2011	Estimate of median household income 2011	
CI90LBINC_2011	90% confidence interval lower bound of estimate of median household income 2011	
CI90UBINC_2011	90% confidence interval upper bound of estimate of median household income 2011	
POV05_2011	Estimate of people under age 5 in poverty 2011	
CI90LB05_2011	90% confidence interval lower bound of estimate of people under age 5 in poverty 2011	
CI90UB05_2011	90% confidence interval upper bound of estimate of people under age 5 in poverty 2011	
PCTPOV05_2011	Estimated percent of people under age 5 in poverty 2011	
CI90LB05P_2011	90% confidence interval lower bound of estimate of percent of people under age 5 in poverty 2011	
CI90UB05P_2011	90% confidence interval upper bound of estimate of percent of people under age 5 in poverty 2011	
		
Sources: Census Bureau Population Estimates: http://www.census.gov/popest/data/index.html		
USDA, Economic Research Service, Rural Classifications: http://www.ers.usda.gov/topics/rural-economy-population/rural-classifications.aspx#.UVRRKjcTSHs		
*/
